{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ML for Trading: How to run an ML algorithm on Quantopian"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The code in this notebook is written for the Quantopian Research Platform and uses the 'Algorithms' rather than the 'Research' option we used before."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To run it, you need to have a free Quantopian account, create a new algorithm and copy the content to the online development environment."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Imports & Settings"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Quantopian Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from quantopian.algorithm import attach_pipeline, pipeline_output, order_optimal_portfolio\n",
    "from quantopian.pipeline import Pipeline, factors, filters, classifiers\n",
    "from quantopian.pipeline.data.builtin import USEquityPricing\n",
    "from quantopian.pipeline.data import Fundamentals\n",
    "from quantopian.pipeline.data.psychsignal import stocktwits\n",
    "from quantopian.pipeline.factors import (Latest, \n",
    "                                         CustomFactor, \n",
    "                                         SimpleMovingAverage, \n",
    "                                         AverageDollarVolume, \n",
    "                                         Returns, \n",
    "                                         RSI, \n",
    "                                         SimpleBeta,                                         \n",
    "                                         MovingAverageConvergenceDivergenceSignal as MACD)\n",
    "from quantopian.pipeline.filters import QTradableStocksUS\n",
    "from quantopian.pipeline.experimental import risk_loading_pipeline, Size, Momentum, Volatility, Value, ShortTermReversal\n",
    "\n",
    "import quantopian.optimize as opt\n",
    "from quantopian.optimize.experimental import RiskModelExposure"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Other Python Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy.stats import spearmanr\n",
    "import talib\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from time import time\n",
    "from collections import OrderedDict\n",
    "\n",
    "from scipy import stats\n",
    "from sklearn import linear_model, preprocessing, metrics, cross_validation\n",
    "from sklearn.pipeline import make_pipeline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Strategy Positions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# strategy parameters\n",
    "N_POSITIONS = 100 # Will be split 50% long and 50% short\n",
    "TRAINING_PERIOD = 126 # past periods for training\n",
    "HOLDING_PERIOD = 5 # predict returns N days into the future\n",
    "\n",
    "# How often to trade, for daily, alternative is date_rules.every_day()\n",
    "TRADE_FREQ = date_rules.week_start()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Custom Universe"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We define a custom universe to limit duration of training."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def Q250US():\n",
    "    \"\"\"Define custom universe\"\"\"\n",
    "    return filters.make_us_equity_universe(\n",
    "        target_size=250,\n",
    "        rankby=factors.AverageDollarVolume(window_length=200),\n",
    "        mask=filters.default_us_equity_universe_mask(),\n",
    "        groupby=classifiers.fundamentals.Sector(),\n",
    "        max_group_weight=0.3,\n",
    "        smoothing_func=lambda f: f.downsample('month_start'),\n",
    "    )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create Alpha Factors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_alpha_factors():\n",
    "    \n",
    "    def PriceToSalesTTM():\n",
    "        \"\"\"Last closing price divided by sales per share\"\"\"        \n",
    "        return Fundamentals.ps_ratio.latest\n",
    "\n",
    "    def PriceToEarningsTTM():\n",
    "        \"\"\"Closing price divided by earnings per share (EPS)\"\"\"\n",
    "        return Fundamentals.pe_ratio.latest\n",
    "    \n",
    "    def DividendYield():\n",
    "        \"\"\"Dividends per share divided by closing price\"\"\"\n",
    "        return Fundamentals.trailing_dividend_yield.latest\n",
    "    \n",
    "    def Capex_To_Cashflows():\n",
    "        return (Fundamentals.capital_expenditure.latest * 4.) / \\\n",
    "            (Fundamentals.free_cash_flow.latest * 4.)\n",
    "        \n",
    "    def EBITDA_Yield():\n",
    "        return (Fundamentals.ebitda.latest * 4.) / \\\n",
    "            USEquityPricing.close.latest        \n",
    "\n",
    "    def EBIT_To_Assets():\n",
    "        return (Fundamentals.ebit.latest * 4.) / \\\n",
    "            Fundamentals.total_assets.latest\n",
    "               \n",
    "    def Return_On_Total_Invest_Capital():\n",
    "        return Fundamentals.roic.latest\n",
    "    \n",
    "    class Mean_Reversion_1M(CustomFactor):\n",
    "        inputs = [Returns(window_length=21)]\n",
    "        window_length = 252\n",
    "\n",
    "        def compute(self, today, assets, out, monthly_rets):\n",
    "            out[:] = (monthly_rets[-1] - np.nanmean(monthly_rets, axis=0)) / \\\n",
    "                np.nanstd(monthly_rets, axis=0)\n",
    "                \n",
    "    def MACD_Signal():\n",
    "        return MACD(fast_period=12, slow_period=26, signal_period=9)\n",
    "           \n",
    "    def Net_Income_Margin():\n",
    "        return Fundamentals.net_margin.latest           \n",
    "\n",
    "    def Operating_Cashflows_To_Assets():\n",
    "        return (Fundamentals.operating_cash_flow.latest * 4.) / \\\n",
    "            Fundamentals.total_assets.latest\n",
    "\n",
    "    def Price_Momentum_3M():\n",
    "        return Returns(window_length=63)\n",
    "    \n",
    "    class Price_Oscillator(CustomFactor):\n",
    "        inputs = [USEquityPricing.close]\n",
    "        window_length = 252\n",
    "\n",
    "        def compute(self, today, assets, out, close):\n",
    "            four_week_period = close[-20:]\n",
    "            out[:] = (np.nanmean(four_week_period, axis=0) /\n",
    "                      np.nanmean(close, axis=0)) - 1.\n",
    "    \n",
    "    def Returns_39W():\n",
    "        return Returns(window_length=215)\n",
    "        \n",
    "    class Vol_3M(CustomFactor):\n",
    "        inputs = [Returns(window_length=2)]\n",
    "        window_length = 63\n",
    "\n",
    "        def compute(self, today, assets, out, rets):\n",
    "            out[:] = np.nanstd(rets, axis=0)\n",
    "            \n",
    "    def Working_Capital_To_Assets():\n",
    "        return Fundamentals.working_capital.latest / Fundamentals.total_assets.latest\n",
    "    \n",
    "    def sentiment():\n",
    "        return SimpleMovingAverage(inputs=[stocktwits.bull_minus_bear],\n",
    "                                    window_length=5).rank(mask=universe)\n",
    "    \n",
    "    class AdvancedMomentum(CustomFactor):\n",
    "        \"\"\" Momentum factor \"\"\"\n",
    "        inputs = [USEquityPricing.close,\n",
    "                  Returns(window_length=126)]\n",
    "        window_length = 252\n",
    "\n",
    "        def compute(self, today, assets, out, prices, returns):\n",
    "            out[:] = ((prices[-21] - prices[-252])/prices[-252] -\n",
    "                      (prices[-1] - prices[-21])/prices[-21]) / np.nanstd(returns, axis=0)\n",
    "            \n",
    "    def SPY_Beta():\n",
    "        return SimpleBeta(target=sid(8554), regression_length=252)\n",
    "\n",
    "    return {\n",
    "        'Price to Sales': PriceToSalesTTM,\n",
    "        'PE Ratio': PriceToEarningsTTM,\n",
    "        'Dividend Yield': DividendYield,\n",
    "        # 'Capex to Cashflows': Capex_To_Cashflows,\n",
    "        # 'EBIT to Assets': EBIT_To_Assets,\n",
    "        # 'EBITDA Yield': EBITDA_Yield,  \n",
    "        'MACD Signal Line': MACD_Signal,\n",
    "        'Mean Reversion 1M': Mean_Reversion_1M,\n",
    "        'Net Income Margin': Net_Income_Margin,        \n",
    "        # 'Operating Cashflows to Assets': Operating_Cashflows_To_Assets,\n",
    "        'Price Momentum 3M': Price_Momentum_3M,\n",
    "        'Price Oscillator': Price_Oscillator,\n",
    "        # 'Return on Invested Capital': Return_On_Total_Invest_Capital,\n",
    "        '39 Week Returns': Returns_39W,\n",
    "        'Vol 3M': Vol_3M,\n",
    "        'SPY_Beta': SPY_Beta,\n",
    "        'Advanced Momentum': AdvancedMomentum,\n",
    "        'Size': Size,\n",
    "        'Volatitility': Volatility,\n",
    "        'Value': Value,\n",
    "        'Short-Term Reversal': ShortTermReversal,\n",
    "        'Momentum': Momentum,\n",
    "        # 'Materials': materials,\n",
    "        # 'Consumer Discretionary': consumer_discretionary,\n",
    "        # 'Financials': financials,\n",
    "        # 'Real Estate': real_estate,\n",
    "        # 'Consumer Staples': consumer_staples,\n",
    "        # 'Healthcare': health_care,\n",
    "        # 'Utilities': utilities,\n",
    "        # 'Telecom ': telecom,\n",
    "        # 'Energy': energy,\n",
    "        # 'Industrials': industrials,\n",
    "        # 'Technology': technology\n",
    "    }"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Custom Machine Learning Factor"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here we define a Machine Learning factor which trains a model and predicts forward returns "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class ML(CustomFactor):\n",
    "    init = False\n",
    "\n",
    "    def compute(self, today, assets, out, returns, *inputs):\n",
    "        \"\"\"Train the model using \n",
    "        - shifted returns as target, and \n",
    "        - factors in a list of inputs as features; \n",
    "            each factor contains a 2-D array of shape [time x stocks]\n",
    "        \"\"\"\n",
    "        \n",
    "        if (not self.init) or today.strftime('%A') == 'Monday':\n",
    "            # train on first day then subsequent Mondays (memory)\n",
    "            # get features\n",
    "            features = pd.concat([pd.DataFrame(data, columns=assets).stack().to_frame(i) \n",
    "                              for i, data in enumerate(inputs)], axis=1)\n",
    "            \n",
    "            # shift returns and align features\n",
    "            target = (pd.DataFrame(returns, columns=assets)\n",
    "                      .shift(-HOLDING_PERIOD)\n",
    "                      .dropna(how='all')\n",
    "                      .stack())\n",
    "            target.index.rename(['date', 'asset'], inplace=True)\n",
    "            features = features.reindex(target.index)\n",
    "            \n",
    "            # finalize features \n",
    "            features = (pd.get_dummies(features\n",
    "                                       .assign(asset=features\n",
    "                                               .index.get_level_values('asset')), \n",
    "                                       columns=['asset'], \n",
    "                                       sparse=True))\n",
    "                        \n",
    "\n",
    "            # train the model\n",
    "            self.model_pipe = make_pipeline(preprocessing.Imputer(),\n",
    "                                            preprocessing.MinMaxScaler(),\n",
    "                                            linear_model.LinearRegression())\n",
    "\n",
    "            \n",
    "            # run pipeline and train model\n",
    "            self.model_pipe.fit(X=features, y=target)\n",
    "            self.assets = assets # keep track of assets in model\n",
    "            self.init = True\n",
    "\n",
    "        # predict most recent factor values\n",
    "        features = pd.DataFrame({i: d[-1] for i, d in enumerate(inputs)}, index=assets)\n",
    "        features = features.reindex(index=self.assets).assign(asset=self.assets)\n",
    "        features = pd.get_dummies(features, columns=['asset'])  \n",
    "        \n",
    "        preds = self.model_pipe.predict(features)\n",
    "        out[:] = pd.Series(preds, index=self.assets).reindex(index=assets)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create Factor Pipeline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create pipeline with predictive factors and target returns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_ml_pipeline(alpha_factors, universe, lookback=21, lookahead=5):\n",
    "    \"\"\"Create pipeline with predictive factors and target returns\"\"\"\n",
    "    \n",
    "    # set up pipeline\n",
    "    pipe = OrderedDict()\n",
    "    \n",
    "    # Returns over lookahead days.\n",
    "    pipe['Returns'] = Returns(inputs=[USEquityPricing.open],\n",
    "                              mask=universe, \n",
    "                              window_length=lookahead + 1)\n",
    "    \n",
    "    # Rank alpha factors:\n",
    "    pipe.update({name: f().rank(mask=universe) \n",
    "                 for name, f in alpha_factors.items()})\n",
    "        \n",
    "    # ML factor gets `lookback` datapoints on each factor\n",
    "    pipe['ML'] = ML(inputs=pipe.values(),\n",
    "                    window_length=lookback + 1, \n",
    "                    mask=universe)\n",
    "    \n",
    "    return Pipeline(columns=pipe, screen=universe) "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Define Algorithm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def initialize(context):\n",
    "    \"\"\"\n",
    "    Called once at the start of the algorithm.\n",
    "    \"\"\"   \n",
    "    set_slippage(slippage.FixedSlippage(spread=0.00))\n",
    "    set_commission(commission.PerShare(cost=0, min_trade_cost=0))\n",
    "    \n",
    "    schedule_function(rebalance_portfolio, \n",
    "                      TRADE_FREQ,\n",
    "                      time_rules.market_open(minutes=1))\n",
    "     \n",
    "    # Record tracking variables at the end of each day.\n",
    "    schedule_function(log_metrics, \n",
    "                      date_rules.every_day(),\n",
    "                      time_rules.market_close())\n",
    "\n",
    "    # Set up universe\n",
    "    # base_universe = AverageDollarVolume(window_length=63, mask=QTradableStocksUS()).percentile_between(80, 100)  \n",
    "    universe = AverageDollarVolume(window_length=63, mask=QTradableStocksUS()).percentile_between(40, 60)\n",
    "    \n",
    "    # create alpha factors and machine learning pipline\n",
    "    ml_pipeline = make_ml_pipeline(alpha_factors=make_alpha_factors(),\n",
    "                                   universe=universe, \n",
    "                                   lookback=TRAINING_PERIOD,\n",
    "                                   lookahead=HOLDING_PERIOD)\n",
    "    attach_pipeline(ml_pipeline, 'alpha_model')\n",
    "\n",
    "    attach_pipeline(risk_loading_pipeline(), 'risk_loading_pipeline')\n",
    "\n",
    "    context.past_predictions = {}\n",
    "    context.realized_rmse = 0\n",
    "    context.realized_ic = 0\n",
    "    context.long_short_spread = 0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Evaluate Model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Evaluate model performance using past predictions on hold-out data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def evaluate_past_predictions(context):\n",
    "    \"\"\"Evaluate model performance using past predictions on hold-out data\"\"\"\n",
    "    # A day has passed, shift days and drop old ones\n",
    "    context.past_predictions = {k-1: v for k, v in context.past_predictions.items() if k-1 >= 0}\n",
    "\n",
    "    if 0 in context.past_predictions:\n",
    "        # Past predictions for the current day exist, so we can use todays' n-back returns to evaluate them\n",
    "        returns = pipeline_output('alpha_model')['Returns'].to_frame('returns')\n",
    "        \n",
    "        df = (context\n",
    "              .past_predictions[0]\n",
    "              .to_frame('predictions')\n",
    "              .join(returns, how='inner')\n",
    "              .dropna())\n",
    "\n",
    "        # Compute performance metrics\n",
    "        context.realized_rmse = metrics.mean_squared_error(y_true=df['returns'], y_pred=df.predictions)\n",
    "        context.realized_ic, _ = spearmanr(df['returns'], df.predictions)\n",
    "        log.info('rmse {:.2%} | ic {:.2%}'.format(context.realized_rmse, context.realized_ic))\n",
    "        \n",
    "        long_rets = df.loc[df.predictions >= df.predictions.median(), 'returns'].mean()\n",
    "        short_rets = df.loc[df.predictions < df.predictions.median(), 'returns'].mean()\n",
    "        context.long_short_spread = (long_rets - short_rets) * 100\n",
    "    \n",
    "    # Store current predictions\n",
    "    context.past_predictions[HOLDING_PERIOD] = context.predictions\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Algo Execution"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Prepare Trades"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def before_trading_start(context, data):\n",
    "    \"\"\"\n",
    "    Called every day before market open.\n",
    "    \"\"\"\n",
    "    context.predictions = pipeline_output('alpha_model')['ML']\n",
    "    context.predictions.index.rename(['date', 'equity'], inplace=True)\n",
    "    context.risk_loading_pipeline = pipeline_output('risk_loading_pipeline')    \n",
    "    evaluate_past_predictions(context)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Rebalance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def rebalance_portfolio(context, data):\n",
    "    \"\"\"\n",
    "    Execute orders according to our schedule_function() timing. \n",
    "    \"\"\"\n",
    "    \n",
    "    predictions = context.predictions   \n",
    "    predictions = predictions.loc[data.can_trade(predictions.index)]\n",
    " \n",
    "    # Select long/short positions\n",
    "    n_positions = int(min(N_POSITIONS, len(predictions)) / 2)\n",
    "    to_trade = (predictions[predictions>0]\n",
    "                .nlargest(n_positions)\n",
    "                .append(predictions[predictions < 0]\n",
    "                        .nsmallest(n_positions)))\n",
    "\n",
    "    # Model may produce duplicate predictions\n",
    "    to_trade = to_trade[~to_trade.index.duplicated()]\n",
    "    \n",
    "    # Setup Optimization Objective\n",
    "    objective = opt.MaximizeAlpha(to_trade)\n",
    "\n",
    "    # Setup Optimization Constraints\n",
    "    constrain_gross_leverage = opt.MaxGrossExposure(1.0)\n",
    "    constrain_pos_size = opt.PositionConcentration.with_equal_bounds(-.02, .02)\n",
    "    market_neutral = opt.DollarNeutral()\n",
    "    constrain_risk = RiskModelExposure(\n",
    "        risk_model_loadings=context.risk_loading_pipeline,  \n",
    "        version=opt.Newest)\n",
    " \n",
    "    # Optimizer calculates portfolio weights and\n",
    "    # moves portfolio toward the target.\n",
    "    order_optimal_portfolio(\n",
    "        objective=objective,\n",
    "        constraints=[   \n",
    "            constrain_gross_leverage,\n",
    "            constrain_pos_size,\n",
    "            market_neutral,\n",
    "            constrain_risk\n",
    "        ],\n",
    "    )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Track Performance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def log_metrics(context, data):\n",
    "    \"\"\"\n",
    "    Plot variables at the end of each day.\n",
    "    \"\"\"\n",
    "    record(leverage=context.account.leverage,\n",
    "           #num_positions=len(context.portfolio.positions),\n",
    "           realized_rmse=context.realized_rmse,\n",
    "           realized_ic=context.realized_ic,\n",
    "           long_short_spread=context.long_short_spread,\n",
    "    )"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": true,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
